Preparing data

# res2023[1,]


education_order <- c(
    "I never completed any formal education",
    "Something else",
    "Primary/elementary school",
    "Secondary school",
    "Associate degree",
    "Some college/university study without earning a degree",
    "Bachelor’s degree",
    "Master’s degree",
    "Professional degree"
)


library("plotly")
library("dplyr")
library("tidyr")
library("stringr")
library("ggplot2")



res2018raw <- read.csv("./developer_surveys/survey_results_2018.csv", sep = ",", header = TRUE)

# convert salary to numeric







res2018 <- res2018raw %>%
    select(FormalEducation, DevType, Salary = ConvertedSalary, SalaryType, Experience = YearsCoding, AdmiredLanguage = LanguageDesireNextYear) %>%
    mutate(Salary = as.numeric(Salary)) %>%
    select(-SalaryType) %>%
    filter(!is.na(Salary) & Salary != 0) %>%
    mutate(Education = str_trim(str_extract(FormalEducation, "^[^(]+"))) %>%
    mutate(Education = ifelse(Education == "Other doctoral degree", "Professional degree", Education)) %>%
    mutate(Education = factor(Education, ordered = TRUE, levels = education_order))

count(res2018)
##       n
## 1 46860
res2023raw <- read.csv("./developer_surveys/survey_results_2023.csv", sep = ",", header = TRUE)

res2023 <- res2023raw %>%
    select(EdLevel, DevType, Salary = ConvertedCompYearly, Experience = YearsCode) %>%
    mutate(Education = str_trim(str_extract(EdLevel, "^[^(]+"))) %>%
    mutate(Education = factor(Education, ordered = TRUE, levels = education_order))

How income is distributed based on education and type

education_devtype_salary <- res2018 %>%
    filter(!is.na(DevType) & !is.na(Education)) %>%
    filter(Education != "I never completed any formal education") %>%
    separate_rows(DevType, sep = ";") %>%
    group_by(Education, DevType) %>%
    summarize(MedSalary = median(Salary))
## `summarise()` has grouped output by 'Education'. You can override
## using the `.groups` argument.
g <- ggplot(data = education_devtype_salary, aes(x = Education, y = MedSalary, group = DevType, color = DevType)) +
    geom_line() +
    scale_fill_brewer(palette = "Blues")

ggplotly(g)

How experience vs salary

Bar (x - experience) y(salary)

experience_levels <- c(
    "0-2", "3-5", "6-8", "9-11",
    "12-14", "15-17", "18-20", "21-23",
    "24-26", "27-29", "30 or more"
)


discretize_experience <- function(experience) {
    experience <- as.numeric(experience)
    return(case_when(
        experience >= 0 & experience <= 2 ~ "0-2",
        experience >= 3 & experience <= 5 ~ "3-5",
        experience >= 6 & experience <= 8 ~ "6-8",
        experience >= 9 & experience <= 11 ~ "9-11",
        experience >= 12 & experience <= 14 ~ "12-14",
        experience >= 15 & experience <= 17 ~ "15-17",
        experience >= 18 & experience <= 20 ~ "18-20",
        experience >= 21 & experience <= 23 ~ "21-23",
        experience >= 24 & experience <= 26 ~ "24-26",
        experience >= 27 & experience <= 29 ~ "27-29",
        TRUE ~ "30 or more"
    ))
}


exp_sal18 <- res2018 %>%
    select(Experience, Salary) %>%
    na.omit() %>%
    mutate(Experience = gsub(" years", "", Experience)) %>%
    mutate(Experience = factor(Experience, levels = experience_levels, ordered = TRUE)) %>%
    group_by(Experience) %>%
    summarize(MedSalary = median(Salary)) %>%
    mutate(year = "2018")
exp_sal18
## # A tibble: 11 × 3
##    Experience MedSalary year 
##    <ord>          <dbl> <chr>
##  1 0-2           15876  2018 
##  2 3-5           30597  2018 
##  3 6-8           45283  2018 
##  4 9-11          55812  2018 
##  5 12-14         67706. 2018 
##  6 15-17         77104  2018 
##  7 18-20         85000  2018 
##  8 21-23         95968  2018 
##  9 24-26         96000  2018 
## 10 27-29         96626  2018 
## 11 30 or more   105253  2018
exp_sal23 <- res2023 %>%
    select(Experience, Salary) %>%
    na.omit() %>%
    mutate(Experience = discretize_experience(Experience)) %>%
    mutate(Experience = factor(Experience, levels = experience_levels, ordered = TRUE)) %>%
    group_by(Experience) %>%
    summarize(MedSalary = median(Salary)) %>%
    mutate(year = "2023")
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Experience = discretize_experience(Experience)`.
## Caused by warning in `discretize_experience()`:
## ! NAs introduced by coercion
exp_sal23
## # A tibble: 11 × 3
##    Experience MedSalary year 
##    <ord>          <dbl> <chr>
##  1 0-2            19276 2023 
##  2 3-5            39724 2023 
##  3 6-8            53545 2023 
##  4 9-11           68276 2023 
##  5 12-14          77104 2023 
##  6 15-17          83173 2023 
##  7 18-20          89222 2023 
##  8 21-23          96657 2023 
##  9 24-26         105548 2023 
## 10 27-29         110000 2023 
## 11 30 or more    106000 2023
exp_sal <- union(exp_sal18, exp_sal23)
exp_sal
## # A tibble: 22 × 3
##    Experience MedSalary year 
##    <ord>          <dbl> <chr>
##  1 0-2           15876  2018 
##  2 3-5           30597  2018 
##  3 6-8           45283  2018 
##  4 9-11          55812  2018 
##  5 12-14         67706. 2018 
##  6 15-17         77104  2018 
##  7 18-20         85000  2018 
##  8 21-23         95968  2018 
##  9 24-26         96000  2018 
## 10 27-29         96626  2018 
## # ℹ 12 more rows
## con

ggplot(exp_sal, aes(x = Experience, y = MedSalary, fill = year, color = year)) +
    geom_bar(stat = "identity", position = position_dodge(), width = 0.45)

# facet_wrap(~year)

Inflation plot

With inflation

Are admired languages well paid

admired18 <- res2018 %>%
    select(AdmiredLanguage, Salary) %>%
    separate_rows(AdmiredLanguage, sep = ";") %>%
    group_by(AdmiredLanguage) %>%
    summarize(SalaryMed = median(Salary), AdmiredLanguageCount = n())

g <- ggplot(admired18, aes(x = AdmiredLanguageCount, y = SalaryMed, color = AdmiredLanguage, fill = AdmiredLanguage)) +
    geom_jitter()

ggplotly(g)

Admired salary - language

Is it better to work in big companies?

Subplots x age_bin, and comparison of salaries per company size

Are differences statistically significant?

Where to work?

Map of the world with colored salaries